from modelscope import AutoModelForCausalLM,AutoTokenizer

model_name = r'Qwen/Qwen3-1.7B'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
prompt = '今天天气如何?'

messages = [
    {"role":"user","content":prompt}
]
text = tokenizer.apply_chat_template(messages,tokenize=False,
                                     add_generation_prompt=True,
                                     enable_thinking=True
                                     )

model_inputs = tokenizer([text],return_tensors='pt').to(model.device)
generated_ids = model.generate(**model_inputs,max_new_tokens=256)
output_ids = generated_ids[0]
content = tokenizer.decode(output_ids)
print(content)
